2018 Week 32 - US Wind Farm Locations
glimpse(us_wind)
Rows: 58,185
Columns: 24
$ case_id <dbl> 3073429, 3071522, 3073425, 3071569, 3005252, 3003…
$ faa_ors <chr> "missing", "missing", "missing", "missing", "miss…
$ faa_asn <chr> "missing", "missing", "missing", "missing", "miss…
$ usgs_pr_id <dbl> 4960, 4997, 4957, 5023, 5768, 5836, 4948, 5828, 4…
$ t_state <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "…
$ t_county <chr> "Kern County", "Kern County", "Kern County", "Ker…
$ t_fips <chr> "06029", "06029", "06029", "06029", "06029", "060…
$ p_name <chr> "251 Wind", "251 Wind", "251 Wind", "251 Wind", "…
$ p_year <dbl> 1987, 1987, 1987, 1987, 1987, 1987, 1987, 1987, 1…
$ p_tnum <dbl> 194, 194, 194, 194, 194, 194, 194, 194, 194, 194,…
$ p_cap <dbl> 18.43, 18.43, 18.43, 18.43, 18.43, 18.43, 18.43, …
$ t_manu <chr> "Vestas", "Vestas", "Vestas", "Vestas", "Vestas",…
$ t_model <chr> "missing", "missing", "missing", "missing", "miss…
$ t_cap <dbl> 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 9…
$ t_hh <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_rd <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_rsa <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_ttlh <dbl> -9999, -9999, -9999, -9999, -9999, -9999, -9999, …
$ t_conf_atr <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
$ t_conf_loc <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
$ t_img_date <chr> "1/1/2012", "1/1/2012", "1/1/2012", "7/31/2016", …
$ t_img_srce <chr> "NAIP", "NAIP", "NAIP", "Digital Globe", "Digital…
$ xlong <dbl> -118.3607, -118.3612, -118.3604, -118.3640, -118.…
$ ylat <dbl> 35.08378, 35.08151, 35.08471, 35.07942, 35.08559,…
us_wind %>%
count(t_state, sort = T)
# A tibble: 45 x 2
t_state n
<chr> <int>
1 TX 13232
2 CA 9037
3 IA 4280
4 OK 3821
5 KS 2898
6 IL 2602
7 MN 2547
8 CO 2278
9 OR 1868
10 WA 1744
# … with 35 more rows
us_wind %>%
count(p_name, sort = T)
# A tibble: 1,479 x 2
p_name n
<chr> <int>
1 unknown Tehachapi Wind Resource Area 1 1831
2 Green Ridge Power 516
3 Stateline Wind Project 440
4 Mesa Wind Farm 432
5 Sky River 335
6 Cedar Creek 274
7 Peetz Table 267
8 Flat Ridge 2 261
9 Rolling Hills 259
10 Woodward Mountain I & II 242
# … with 1,469 more rows
us_wind %>%
filter(xlong<100) %>% # filter out outlier
ggplot(aes(xlong, ylat)) +
geom_point() +
borders("state") +
coord_map() +
theme_void()
count_states <- us_wind$t_state %>% factor() %>% fct_count()
us_wind %>%
filter(!t_state %in% c("AK", "HI", "GU", "PR")) %>% # Exclude Alaska, Hawaii, Guam, Puerto Rico
ggplot(aes(xlong, ylat)) +
geom_point() +
borders("state") +
coord_map() +
labs(title = "Distribution of wind turbines in US",
subtitle = "Most wind turbines are situated along middle of US.",
caption = "Source: USGS.gov") +
theme_void()
us_wind_raw <- us_wind
us_wind_processed <- us_wind %>%
filter(!t_state %in% c("AK", "HI", "GU", "PR")) %>%
na_if(-9999) # replace -9999 as na
wind_projects <- us_wind_processed %>%
group_by(p_name, t_state) %>%
summarise(turbines = n(),
long = mean(xlong),
lat = mean(ylat),
long_sd = sd(xlong),
lat_sd = sd(ylat))
wind_projects %>%
ggplot(aes(long, lat, col = turbines, size = turbines)) +
geom_point(aes(size = turbines), show.legend = T) +
scale_color_continuous(type = "viridis") +
borders("state") +
coord_map() +
labs(title = "Distribution of projects in US",
subtitle = "The bigest project is 251 Wind, in California",
caption = "Source: usgs.gov") +
theme_void()
To find out what is the biggest project:
us_wind_processed %>%
count(p_name, t_state)
# A tibble: 1,440 x 3
p_name t_state n
<chr> <chr> <int>
1 251 Wind CA 190
2 30 MW Iowa DG Portfolio IA 10
3 6th Space Warning Squadron MA 2
4 Adair IA 76
5 Adams IA 64
6 Adams Wind Generations, LLC MN 12
7 AFCEE MMR Turbines MA 2
8 AG Land 1 IA 1
9 AG Land 2 IA 1
10 AG Land 3 IA 1
# … with 1,430 more rows
wind_projects <- us_wind_processed %>%
group_by(p_name, t_state) %>%
summarise(year = min(p_year, na.rm = T), # first year project started
turbines = n(),
total_capacity_kw = sum(t_cap, na.rm = T),
lon = mean(xlong),
lat = mean(ylat),
lon_sd = sd(xlong),
lat_sd = sd(ylat))
wind_projects %>%
ggplot(aes(year)) +
geom_histogram(fill = "deepskyblue4") +
labs(title = "Distribution of projects by year",
subtitle = "Wind Turbine Projects gained momentum after 2000",
caption = "Source: usgs.gov") +
theme_clean()
wind_projects %>%
ggplot(aes(lon, lat, size = turbines, col = year))+
geom_point(aes(size = turbines), show.legend = T) +
scale_color_continuous(type = "viridis") +
borders("state") +
coord_map() +
labs(title = "Age and Scale of US Wind Turbine Projects",
subtitle = "251 Wind in CA is the oldest project, and the newer projects are situated along middle of the country",
caption = "Source: usgs.gov") +
theme_void()
us_wind_processed %>%
distinct(p_name, p_cap) %>% # capacity
count(p_name, sort = T)
# A tibble: 1,425 x 2
p_name n
<chr> <int>
1 McNeilus 5
2 Bishop Hill I 3
3 Blue Summit 3
4 Capricorn Ridge 3
5 Capricorn Ridge expansion 3
6 Case Western University 3
7 Century Expansion 3
8 Crossroads 3
9 Crow Lake 3
10 Horse Hollow II 3
# … with 1,415 more rows
us_wind_processed %>%
group_by(p_name, t_state) %>%
summarise(year = min(p_year, na.rm = T), # first year project started
turbines = n(),
total_capacity_kw = sum(t_cap, na.rm = T),
lon = mean(xlong),
lat = mean(ylat),
lon_sd = sd(xlong),
lat_sd = sd(ylat)) %>%
ungroup()
# A tibble: 1,440 x 9
p_name t_state year turbines total_capacity_… lon lat
<chr> <chr> <dbl> <int> <dbl> <dbl> <dbl>
1 251 Wind CA 1987 190 18050 -118. 35.1
2 30 MW Iowa DG… IA 2017 10 30000 -93.4 42.0
3 6th Space War… MA 2013 2 3360 -70.5 41.8
4 Adair IA 2008 76 174800 -94.7 41.5
5 Adams IA 2016 64 154284 -94.7 40.9
6 Adams Wind Ge… MN 2011 12 20040 -94.7 44.9
7 AFCEE MMR Tur… MA 2011 2 3000 -70.5 41.8
8 AG Land 1 IA 2012 1 1600 -93.3 42.2
9 AG Land 2 IA 2012 1 1600 -93.4 42.1
10 AG Land 3 IA 2012 1 1600 -93.4 42.1
# … with 1,430 more rows, and 2 more variables: lon_sd <dbl>,
# lat_sd <dbl>
turbine <- us_wind_processed %>%
group_by(p_name, t_state) %>%
summarise(year = min(p_year, na.rm = T), # first year project started
turbines = n(),
total_capacity_kw = sum(t_cap),
lon = mean(xlong),
lat = mean(ylat),
lon_sd = sd(xlong),
lat_sd = sd(ylat)) %>%
ungroup()
turbine %>%
ggplot(aes(year, total_capacity_kw/turbines)) +
geom_point() +
geom_smooth(method= "lm") +
labs(title = "Change in Total Capacity per Turbine over Time",
subtitle = "Total Capacity per Turbine increased over time",
caption = "Source: usgs.gov") +
theme_few()
turbine_models <- us_wind_processed %>%
group_by(t_model) %>%
summarize(t_cap = median(t_cap), # turbine capacity (kW)
t_hh = median(t_hh), # turbine hub height (m)
t_rd = median(t_rd), # turbine rotor diameter (m)
t_rsw = median(t_rsa), # turbine rotor swept area (m2)
t_ttlh = median(t_ttlh), # turbine total height calculated (m)
turbines = n(), # number of turbines
projects = n_distinct(p_name)) %>% # number of projects
arrange(desc(projects))
turbine_models %>%
ggplot(aes(t_ttlh, t_cap)) +
geom_point() +
labs(title = "Relationship between turbine height and capacity",
subtitle = "Taller Turbines have higher capacity",
x = "Turbine Total Height Calculated (m)",
y = "Turbine Capacity (kW)") +
theme_clean()
https://www.youtube.com/watch?v=O1oDIQV6VKU&list=PL19ev-r1GBwkuyiwnxoHTRC8TTqP8OEi8&index=78
For attribution, please cite this work as
lruolin (2021, June 18). pRactice corner: Tidy Tuesday Series. Retrieved from https://lruolin.github.io/myBlog/posts/20210619_Tidytuesday wind data/
BibTeX citation
@misc{lruolin2021tidy, author = {lruolin, }, title = {pRactice corner: Tidy Tuesday Series}, url = {https://lruolin.github.io/myBlog/posts/20210619_Tidytuesday wind data/}, year = {2021} }